In [ ]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import geopandas as gpd
import folium
import networkx as nx
import osmnx as ox
from pyproj import Proj, transform
from shapely.ops import nearest_points
from geopy.distance import geodesic
import pickle
import networkx as nx
from shapely.geometry import LineString
from scipy import spatial
from tqdm import tqdm
import gurobipy as gp

Location by Frequency¶

In this notebook, the location of the inspector is decided only by looking at the frequency of the accident in the road segment. The network shapefile is already provided road section id, hence it is going to be used to discritized the road.

The main work-flow for this method is:

  • Calculate the distance between accident and the road network.
  • Assign the accident according its road section id on the shapefiles.
  • Assign the the road inspector to the highest accident frequency.
  • Evaluate the travel time to each incident.

The calculation of distance between accident and road network¶

The first section is to calculate the distance. In this case, several data filtering is needed. The train and test data split are also done here. Other than that, the coordinate reference systems of the incident and the road network is synchronozed to the WGS84 by using the given algorithm and also existing python module (Pyproj)

In [ ]:
# Functions from the teachers to transform the crs.

def DutchRDtoWGS84(rdX, rdY):
    """ Convert DutchRD to WGS84
    """
    RD_MINIMUM_X = 11000
    RD_MAXIMUM_X = 280000
    RD_MINIMUM_Y = 300000
    RD_MAXIMUM_Y = 630000
    if (rdX < RD_MINIMUM_X or rdX > RD_MAXIMUM_X
        or rdY < RD_MINIMUM_Y or rdY > RD_MAXIMUM_Y):
        resultNorth = -1
        resultEast = -1
        return resultNorth, resultEast
    # else
    dX = (rdX - 155000.0) / 100000.0
    dY = (rdY - 463000.0) / 100000.0
    k = [[3600 * 52.15517440, 3235.65389, -0.24750, -0.06550, 0.0],
        [-0.00738   ,   -0.00012,  0.0    ,  0.0    , 0.0],
        [-32.58297   ,   -0.84978, -0.01709, -0.00039, 0.0],
        [0.0       ,    0.0    ,  0.0    ,  0.0    , 0.0],
        [0.00530   ,    0.00033,  0.0    ,  0.0    , 0.0],
        [0.0       ,    0.0    ,  0.0    ,  0.0    , 0.0]]
    l = [[3600 * 5.38720621,    0.01199,  0.00022,  0.0    , 0.0],
        [5260.52916   ,  105.94684,  2.45656,  0.05594, 0.00128],
        [-0.00022   ,    0.0    ,  0.0    ,  0.0    , 0.0],
        [-0.81885   ,   -0.05607, -0.00256,  0.0    , 0.0],
        [0.0       ,    0.0    ,  0.0    ,  0.0    , 0.0],
        [0.00026   ,    0.0    ,  0.0    ,  0.0    , 0.0]]
    resultNorth = 0
    resultEast = 0
    powX = 1

    for p in range(6):
        powY = 1
        for q in range(5):
            resultNorth = resultNorth + k[p][q] * powX * powY / 3600.0
            resultEast = resultEast + l[p][q] * powX * powY / 3600.0
            powY = powY * dY
        powX = powX * dX
    return resultNorth, resultEast

def WGS84toDutchRD(wgs84East, wgs84North):
    # translated from Peter Knoppers's code

    # wgs84East: longtitude
    # wgs84North: latitude

    # Western boundary of the Dutch RD system. */
    WGS84_WEST_LIMIT = 3.2

    # Eastern boundary of the Dutch RD system. */
    WGS84_EAST_LIMIT = 7.3

    # Northern boundary of the Dutch RD system. */
    WGS84_SOUTH_LIMIT = 50.6

    # Southern boundary of the Dutch RD system. */
    WGS84_NORTH_LIMIT = 53.7

    if (wgs84North > WGS84_NORTH_LIMIT) or \
        (wgs84North < WGS84_SOUTH_LIMIT) or \
        (wgs84East < WGS84_WEST_LIMIT) or \
        (wgs84East > WGS84_EAST_LIMIT):
        resultX = -1
        resultY = -1
    else:
        r = [[155000.00, 190094.945,   -0.008, -32.391, 0.0],
            [-0.705, -11832.228,    0.0  ,   0.608, 0.0],
            [0.0  ,   -114.221,    0.0  ,   0.148, 0.0],
            [0.0  ,     -2.340,    0.0  ,   0.0  , 0.0],
            [0.0  ,      0.0  ,    0.0  ,   0.0  , 0.0]]
        s = [[463000.00 ,      0.433, 3638.893,   0.0  ,  0.092],
            [309056.544,     -0.032, -157.984,   0.0  , -0.054],
            [73.077,      0.0  ,   -6.439,   0.0  ,  0.0],
            [59.788,      0.0  ,    0.0  ,   0.0  ,  0.0],
            [0.0  ,      0.0  ,    0.0  ,   0.0  ,  0.0]]
        resultX = 0
        resultY = 0
        powNorth = 1
        dNorth = 0.36 * (wgs84North - 52.15517440)
        dEast = 0.36 * (wgs84East - 5.38720621)

        for p in range(5):
            powEast = 1
            for q in range(5):
                resultX = resultX + r[p][q] * powEast * powNorth
                resultY = resultY + s[p][q] * powEast * powNorth
                powEast = powEast * dEast
            powNorth = powNorth * dNorth
    return resultX, resultY

def calc_distance(line_wkt):
    line = ogr.CreateGeometryFromWkt(line_wkt)
    points = line.GetPoints()
    d = 0
    for p0, p1 in zip(points, points[1:]):
        d = d + geodesic(p0, p1).m
    return d

if __name__=="__main__":
    x, y = WGS84toDutchRD(4.33, 52.04)
    print(DutchRDtoWGS84(x, y))
(52.03999999894767, 4.330000046074026)
In [ ]:
# Function from the pyproj module to project the network shapefiles entirely
def utm_projection(gdf,initial_crs):
    target_crs = 'EPSG:32631'
    transformer = Proj(init=initial_crs), Proj(init=target_crs)

    gdf_utm = gdf.to_crs(target_crs)
    return(gdf_utm)

def wgs84_projection(gdf, initial_crs):
    target_crs = 'EPSG:4326'
    transformer = Proj(init=initial_crs), Proj(init=target_crs)

    gdf_wgs84 = gdf.to_crs(target_crs)
    return(gdf_wgs84)

In the cell below, the network data shapefile is loaded and transformed.

In [ ]:
# Extract subnetwork
highway_shapefile = 'Shapefiles/Snelheid_Wegvakken.shp'
network_temp = gpd.read_file(highway_shapefile)

# Make a geodataframe of the network and 
# define the crs to make the transformation easier
df_rd = gpd.GeoDataFrame(geometry=network_temp['geometry'], crs='EPSG:28992')

df_utm = utm_projection(df_rd, 'EPSG:28992')
df_wgs84 = wgs84_projection(df_rd, 'EPSG:28992')

# Changing the column to english for easier analysis
df_utm['Road_section_id'] = network_temp['WVK_ID']
df_utm['Road_number'] = network_temp['WEGNR_HMP']
df_wgs84['Road_section_id'] = network_temp['WVK_ID']
df_wgs84['Road_number'] = network_temp['WEGNR_HMP']
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)

The incident data frame is loaded and filtered for only the highway incidents. In this part, the train-test data split is also made.

In [ ]:
# Load incident csv data
incident = pd.read_csv('incidents19Q3Q4.csv')

# Rename the column for easier analysis
incident.rename(columns = {'vild_primair_wegnummer':'Road_number', 
                           'primaire_locatie_lengtegraad':'longitude', 
                           'primaire_locatie_breedtegraad':'latitude'}, inplace=True)

# Seperate the highway data
incident = incident.loc[np.where(incident['Road_number'].str[0] == 'A')]

# Train-test data split
data = incident[['longitude', 'latitude']].values
num_samples = len(data)
num_samples_keep = int(0.8*num_samples)

np.random.shuffle(data)
train_data = incident.iloc[:num_samples_keep]
test_data = incident.iloc[num_samples_keep:]

train_data
Out[ ]:
Unnamed: 0 id type starttime_new endtime_new Road_number longitude latitude
0 0 A-ALL-IM19087676NLD_1 vehicle_obstruction 2019-08-28 12:11:32 2019-12-11 11:32:28 A1 4.974663 52.346931
1 3 A-ALL-IM19087677NLD_1 vehicle_obstruction 2019-08-28 12:11:32 2019-12-11 11:32:28 A9 4.716725 52.514820
2 5 A-ALL-IM19087678NLD_1 vehicle_obstruction 2019-08-28 12:11:32 2019-12-11 11:32:28 A9 4.738364 52.609730
3 7 A-ALL-IM19087679NLD_1 vehicle_obstruction 2019-08-28 12:11:32 2019-12-11 11:32:28 A35 6.824692 52.204929
4 174938 A-ALL-IM19087680NLD_1 vehicle_obstruction 2019-08-28 12:11:32 2019-12-11 11:32:28 A4 4.346407 52.041920
... ... ... ... ... ... ... ... ...
71333 337797 RWS03_804572_1 vehicle_obstruction 2019-10-29 22:19:50 2019-10-29 23:35:46 A2 5.791368 51.003658
71334 337800 RWS03_804573_1 general_obstruction 2019-10-29 22:32:55 2019-10-29 23:02:59 A15 4.372907 51.875309
71335 337803 RWS03_804574_1 accident 2019-10-29 22:41:16 2019-10-29 23:56:46 A1 6.343580 52.244808
71336 337804 RWS03_804575_1 accident 2019-10-29 22:49:03 2019-10-29 23:12:35 A9 4.705645 52.396221
71337 337806 RWS03_804577_1 vehicle_obstruction 2019-10-29 23:03:09 2019-10-30 00:18:46 A13 4.351567 52.038780

57690 rows × 8 columns

In [ ]:
 

Based on the train data, the geodataframe of the incident is made.

In [ ]:
# Incident geodataframe from the train dataset
Geometry = gpd.points_from_xy(train_data.latitude, train_data.longitude)
gdf_incident_wgs84 = gpd.GeoDataFrame(train_data, geometry=Geometry, crs='EPSG:4326')
gdf_incident_utm = utm_projection(gdf_incident_wgs84, 'EPSG:4326')
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
c:\Users\syaka\anaconda3\envs\geospatial\Lib\site-packages\pyproj\crs\crs.py:141: FutureWarning: '+init=<authority>:<code>' syntax is deprecated. '<authority>:<code>' is the preferred initialization method. When making the change, be mindful of axis order changes: https://pyproj4.github.io/pyproj/stable/gotchas.html#axis-order-changes-in-proj-6
  in_crs_string = _prepare_from_proj_string(in_crs_string)
In [ ]:
# Incident heatmap visualisation
from folium import plugins
map = folium.Map(location=[52.399190, 4.893658])
gjson = folium.features.GeoJson(
    df_wgs84,
).add_to(map)

# List of accident for the heatmap visualisation
geo_df_list = [[gdf_incident_wgs84['geometry'].iloc[i].xy[0][0],
                gdf_incident_wgs84['geometry'].iloc[i].xy[1][0]] for i in range(len(gdf_incident_wgs84.geometry))]

incident_a200_mark = plugins.HeatMap(geo_df_list).add_to(map)

map
Out[ ]:
Make this Notebook Trusted to load map: File -> Trust Notebook

The cell below is the visualisation of the accident points. However, it is made be the comment to reduce the computation resource for this notebook.

In [ ]:
from folium import plugins
map = folium.Map(location=[52.3469314575, 4.9746627808], zoom_start=100)
gjson = folium.features.GeoJson(
    df_wgs84,
).add_to(map)

geo_df_list = [[gdf_incident_wgs84['geometry'].iloc[i].xy[0][0],
                gdf_incident_wgs84['geometry'].iloc[i].xy[1][0]] for i in range(len(gdf_incident_wgs84['geometry']))]

for i in range(len(gdf_incident_wgs84['geometry'])):
    folium.Marker(location=geo_df_list[i], popup=geo_df_list[i]).add_to(map)

map

Assign the incident to the road segment¶

In this part all the incident points are assigned to the nearest road network. However, it could be found that there are several mismatch data points. To solve this problem, only the data within 5 km to the network is considered. The cell below is function definition to calculate actual distance between the incident and the road network.

In [ ]:
def point2line_distance(point, line_geodf):
    """
    This function calculate the euclidian distance between point to the nearest road network
    Input: incident point and linestring of the road network
    output: the distance in meter, road section of the incident and the number of the accident.
    """
    distance_list = []
    nearest_point_list = []
    road_section_list = []
    road_number_list = []
    
    for idx, row in line_geodf.iterrows():
        road_num = row['Road_number']
        road_section = row['Road_section_id']
        nearest_points_on_line, nearest_point = nearest_points(row['geometry'], point)
        
        # Check if nearest_points_on_line is not empty before calculating distance
        if nearest_points_on_line:
            distance = geodesic((nearest_points_on_line.y, nearest_points_on_line.x), (point.x, point.y)).meters
            nearest_point_list.append(nearest_points_on_line)
            distance_list.append(distance)
            road_section_list.append(road_section)
            road_number_list.append(road_num)

    if distance_list:
        distance2line = min(distance_list)
        id = distance_list.index(distance2line)
        road_section_id = road_section_list[id]
        road_number_accident = road_number_list[id]
    else:
        # Handle the case where no nearest points were found
        distance2line = None
        road_section_id = None
        road_number_accident = None
    
    return distance2line, road_section_id, road_number_accident

The cell below is the execution of the function above to find the incident location along the road network. This cell is commented.

In [ ]:
# ALERT!!! THIS CELL TAKES 3 HOURS TO RUN, THE OBTAINED DATA IN THIS CELL IS SAVED IN THE CSV FILE
distancetoline = []
road_section_idx = []
road_numb = []

for idx, row in gdf_incident_wgs84.iterrows():
    numnum = row['Road_number']
    network_set = df_wgs84.loc[np.where(df_wgs84['Road_number'] == numnum)]
    distance, id_road, road_number = point2line_distance(row['geometry'], network_set)
    distancetoline.append(distance)
    road_section_idx.append(id_road)
    road_numb.append(road_number)

After the calculation, the result is saved in the data frame. It is because of the computational time of the model is high.

In [ ]:
# Define empty data frame
distance_calculation = pd.DataFrame()

# Dataframe definition according to the road section id assignment
distance_calculation['Distance_to_line_meters'] = distancetoline
distance_calculation['Road_section_id'] = road_section_idx
distance_calculation['Road_number'] = road_numb

# Save the result into csv file
distance_calculation.to_csv('distance_calculation')

Re-read the data for the analysis and then keep the points within 5 km from the network.

In [ ]:
# Load the road section assignment data
distance_calculation = pd.read_csv('distance_calculation')
distance_calculation = distance_calculation.dropna()
distance_calculation.reset_index(inplace=True)
distance_calculation
Out[ ]:
index Unnamed: 0 Distance_to_line_meters Road_section_id Road_number
0 0 0 106.542014 253369098.0 A1
1 1 1 118.453228 218405001.0 A9
2 2 2 42.934743 221426011.0 A9
3 3 3 85.899006 600208581.0 A35
4 4 4 38.520854 166301026.0 A4
... ... ... ... ... ...
57547 57685 57685 1177.563364 600142850.0 A2
57548 57686 57686 97.037900 600107567.0 A15
57549 57687 57687 615.074453 600149717.0 A1
57550 57688 57688 760.175507 215385013.0 A9
57551 57689 57689 275.936673 600440957.0 A13

57552 rows × 5 columns

The networkx of the road is saved in the pickle format by other part of this assignment. The number of accident than assigned to the road section in the networkx.

In [ ]:
# Load the pickle file
G = pickle.load(open('NetworkX_graph_new.pickle', 'rb'))
In [ ]:
# Count the number of accident for each road section
grouped_section_id = distance_calculation.groupby(['Road_section_id']).count()
grouped_section_id
Out[ ]:
index Unnamed: 0 Distance_to_line_meters Road_number
Road_section_id
61173050.0 3 3 3 3
62174080.0 1 1 1 1
62174084.0 3 3 3 3
63175041.0 10 10 10 10
65177009.0 1 1 1 1
... ... ... ... ...
600460664.0 2 2 2 2
600460786.0 5 5 5 5
600462009.0 12 12 12 12
600462010.0 1 1 1 1
600462870.0 1 1 1 1

7904 rows × 4 columns

In [ ]:
# Get dict of road_id's and corresponding edges
road_id_dict = nx.get_edge_attributes(G, 'Road_section_id')

# Make a list of the number of the incident.
incident_id_list = list(grouped_section_id.index)

# Create new attributes for the number of incident
nx.set_edge_attributes(G, 0, 'Number_of_accident')

# Loop over all edges
for key in road_id_dict:
    # Find corresponding edge in merged_network dataframe
    road_id = road_id_dict[key]
    if road_id in incident_id_list:
        # Get number of accident
        num_accident = float(grouped_section_id.loc[road_id][0])
        
        # Add number of accident to graph
        G.edges[key]['Number_of_accident'] = num_accident
    else:
        continue
In [ ]:
# Safe graph to file
pickle.dump(G, open('NetworkX_graph_new.pickle', 'wb'))

Road Inspector Assignment¶

Based on the incident frequency, the inspectors are assigned.

In [ ]:
# Load the road section id on the network once again
road_id_dict = nx.get_edge_attributes(G, 'Road_section_id')
In [ ]:
# Rank the frequency of the incident.
ranked_road_section = grouped_section_id.sort_values(by=['Distance_to_line_meters'], ascending=False)

# create the function for the location list
def location_list_rank_only(ranked_road_section, road_id_dict,inspector_number):
    # slice the id of the road
    inspector_section_id = ranked_road_section.iloc[0:inspector_number].index
    # empty list to store the location
    inspector_location = []

    #select the coordinate of the road section id
    for i in range(0,inspector_number):
        road_section = inspector_section_id[i]
        for key in road_id_dict:
            if road_section == road_id_dict[key]:
                location = key[0]
        #store the location to the list
        inspector_location.append(location)

    inspector_location_wgs84 = []
    for i in range(len(inspector_location)):
        inspector_location_wgs = DutchRDtoWGS84(inspector_location[i][0],inspector_location[i][1])
        inspector_location_wgs84.append(inspector_location_wgs)

    return inspector_location_wgs84

bb = location_list_rank_only(ranked_road_section, road_id_dict,12)
bb
Out[ ]:
[(52.11403737842036, 4.46432990551027),
 (52.077426203822945, 4.404451436803937),
 (51.470078943087096, 5.404911522766767),
 (52.11158511681282, 5.048051297585474),
 (52.41687629512844, 4.867038473550502),
 (51.41023367095525, 5.430407986197486),
 (51.52121573297706, 5.231189024064484),
 (51.53430020931579, 5.18344471682761),
 (51.91418351016589, 4.985770376705334),
 (52.0600784930308, 4.731318538557549),
 (52.165200959133344, 5.479357451231505),
 (51.91113392315134, 4.5374885931250555)]

Travel Time¶

A loop to calculate travel time

In [ ]:
# travel_time function
def travel_time_func_freq(point1, point2, time='min'):
    """This function uses the information given in network X to return the travel time between two points.
        point1 and point2 should be tuples with the coordinates in longitude, latitude.
        if time = 'peak', the peak travel time is used. In all other cases the minimum travel time is used."""

    # Determine which travel times to use
    if time == 'peak':
        time_string = 'Peak_travel_time_[s]'
    else:
        time_string = 'Min_travel_time_[s]'

    # Change points to Dutch system
    # p1_x, p1_y = WGS84toDutchRD(point1[0], point1[1]) # inspector
    # p2_x, p2_y = WGS84toDutchRD(point2[0], point2[1]) # incident
    # If it is for the frequency method, does not need to change it
    p1_x, p1_y = (point1[0], point1[1]) # inspector
    p2_x, p2_y = (point2[0], point2[1]) # incident

    # Create numpy matrix from nodes
    A = np.array(list(G.nodes()))

    # Get node closest to each point
    dist_node1, index_node1 = spatial.KDTree(A).query([p1_x, p1_y])
    node1 = (A[index_node1][0], A[index_node1][1])

    dist_node2, index_node2 = spatial.KDTree(A).query([p2_x, p2_y])
    node2 = (A[index_node2][0], A[index_node2][1])

    # Get shortest path between nodes
    #route = nx.shortest_path(G, node1, node2, time_string)
    travel_time = nx.shortest_path_length(G, node1, node2, time_string)

    return travel_time

The cell below is used to calculate all the travel time from each incident to the closest inspector. The formulation is for certain number of the inspector.

In [ ]:
# the load of the nodes into the array for the kdtree
A = np.array(list(G.nodes()))
In [ ]:
# making of kdtree for closest nodes identification
kdtree = spatial.cKDTree(A)
In [ ]:
# train data and inspector location preparation
inspectors = location_list_rank_only(ranked_road_section, road_id_dict,120)

inspector_list = [WGS84toDutchRD(inspectors[i][1], inspectors[i][0]) for i in range(len(inspectors))]
train_list = [WGS84toDutchRD(row.longitude, row.latitude) for idx, row in gdf_incident_wgs84.iterrows()]

train_array = np.array(train_list)
inspector_array = np.array(inspector_list)
In [ ]:
# calculation of the travel time if there is no optimization.
def travel_time_check(inspector_in_list):
    """This function is taking the list of inspectors and calculate it directly to the incidents point
    The outputs are travel time in list and the list of inspectors coordinates assigned to each incidents"""
    assigned_inspector = []
    travel_time_list = []
    for incident in tqdm(train_list, mininterval=0.00001):
        closest_inspector_index = kdtree.query(incident, k=1)[1]
        closest_inspector = inspector_list[closest_inspector_index % len(inspector_list)]
        wgs_closest_inspector = DutchRDtoWGS84(closest_inspector[0], closest_inspector[1])
        assigned_inspector.append(wgs_closest_inspector)
        time = travel_time_func_freq(closest_inspector, incident, time='min')
        travel_time_list.append(time)

    return assigned_inspector, travel_time_list

The cell below is calculating all the distance from the inspector to the incident if the optimization is not done. It is used as the indication for the optimum location.

In [ ]:
inspector_longitude_120 = [inspectors[i][1] for i in range(len(inspectors))]
inspector_latitude_120 = [inspectors[i][0] for i in range(len(inspectors))]

inspector_200 = pd.DataFrame()
inspector_200['inspector_longitude'] = inspector_longitude_120
inspector_200['inspector_latitude'] = inspector_latitude_120

incident_200_latitude = [row.latitude for idx, row in gdf_incident_wgs84.iterrows()]
incident_200_longitude = [row.longitude for idx, row in gdf_incident_wgs84.iterrows()]
assigned_inspector_200_latitude = [assigned_inspector[i][0] for i in range(len(assigned_inspector))]
assigned_inspector_200_longitude = [assigned_inspector[i][1] for i in range(len(assigned_inspector))]

incident_200_inspector = pd.DataFrame()
incident_200_inspector['incident_latitude'] = incident_200_latitude
incident_200_inspector['incident_longitude'] = incident_200_longitude
incident_200_inspector['inspector_latitude'] = assigned_inspector_200_latitude
incident_200_inspector['inspector_longitude'] = assigned_inspector_200_longitude
incident_200_inspector['travel_time[s]'] = travel_time_list
In [ ]:
incident_200_inspector.to_csv('incident_120_inspector')
inspector_200.to_csv('inspector_location_120')

The cell below load assignment result if there is no optimization takes place.

In [ ]:
incident_120_inspector = pd.read_csv('incident_120_inspector')
inspector_location_120 = pd.read_csv('inspector_location_120')

Optimization¶

Optimization uses Gurobi as the tool. The optimization has the objective to minimize travel time to every incidents. The model input is the possible location of the inspectors based on their frequency. Furthermore, decision variables are defined. The binary variables of the inspectors location is stored in the inspector_open variable. The assignment of the inspector to each incident is also defined as the binary values in array shape.

Inspector number is set to 47 as the comparison to other optimization method.

The warm start is used in this method to get the initial solution for the further step of the optimization. The iteration is done for several times.

In [ ]:
# the definition of the candidates
inspectors = location_list_rank_only(ranked_road_section, road_id_dict,500)

inspector_list = [WGS84toDutchRD(inspectors[i][1], inspectors[i][0]) for i in range(len(inspectors))]

# the number of sample for the optimization process
# the result from old samples is used as the initial solution for the new sample
# optimization as the 'warm start'
old_sample = 100
new_sample = 2000

# train data preparation
train_list = [WGS84toDutchRD(row.longitude, row.latitude) for idx, row in gdf_incident_wgs84.iterrows()]
train_list = train_list[0:new_sample]

# number of inspector definition for the optimization
ins_num = 47
In [ ]:
# model definition
model = gp.Model(name='inspector location optimization')
In [ ]:
# setting binary variables for the chosen inspector
inspector_open = model.addVars(inspector_list, vtype=gp.GRB.BINARY, name="inspector_open")
In [ ]:
# for a warm start: set an initial solution for the next iteration
for i in inspector_list:
    if i in chosen_inspectors:
        inspector_open[i].start = 1
    else:
        inspector_open[i].start = 0
In [ ]:
# setting binary variables for the assignment of the inspector
# to each incident
flow = {}
for i in train_list:
    for j in inspector_list:
        flow[i,j] = model.addVar(vtype=gp.GRB.BINARY, name=f"flow_{i}_{j}")


# for a warm start: set an initial solution for the next iteration
for i in train_list[0:old_sample]:
    incident_index = train_list.index(i)
    for j in inspector_list:
        inspector_index = inspector_list.index(j)
        if  assigned_flows[incident_index, inspector_index] > 0.5:
            flow[i, j].start = 1
        else:
            flow[i, j].start = 0
In [ ]:
# Set the maximum number of iterations, and threads 
# to reduce the computational time
max_iterations = 50
model.Params.IterationLimit = max_iterations
model.Params.Threads = 4
Set parameter IterationLimit to value 50
Set parameter Threads to value 4

The additional constraint for this problem is the number of inspector (as stated before) and the incident needs to be assigned to one inspector.

In [ ]:
# set the objective function for minimizing the travel time
# each incident
objective = model.setObjective(gp.quicksum(
                                flow[i, j] * travel_time_func_freq(j, i, time='min')
                                for i in train_list for j in inspector_list
                                ), 
                                sense=gp.GRB.MINIMIZE)


# set constraints
accident_constraint = model.addConstrs(gp.quicksum(flow[i, j] for j in inspector_list) == 1 for i in train_list)
number_const = model.addConstr(gp.quicksum(inspector_open[i] for i in inspector_list) == ins_num)
In [ ]:
# run the optimization
model.optimize()
Gurobi Optimizer version 10.0.3 build v10.0.3rc0 (win64)

CPU model: 12th Gen Intel(R) Core(TM) i7-12700H, instruction set [SSE2|AVX|AVX2]
Thread count: 14 physical cores, 20 logical processors, using up to 4 threads

Optimize a model with 2001 rows, 1000500 columns and 1000500 nonzeros
Model fingerprint: 0xed7363b9
Variable types: 0 continuous, 1000500 integer (1000500 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [3e-11, 4e+04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 5e+01]

Warning: Completing partial solution with 950500 unfixed non-continuous variables out of 1000500
User MIP start did not produce a new incumbent solution
User MIP start violates constraint R2000 by 47.000000000

Found heuristic solution: objective 6330289.5513
Presolve removed 2001 rows and 1000500 columns
Presolve time: 0.31s
Presolve: All rows and columns removed

Explored 0 nodes (0 simplex iterations) in 1.06 seconds (0.59 work units)
Thread count was 1 (of 20 available processors)

Solution count 2: 382842 6.33029e+06 

Optimal solution found (tolerance 1.00e-04)
Best objective 3.828420247959e+05, best bound 3.828420247959e+05, gap 0.0000%
In [ ]:
# write the chosen inspector
if model.status == gp.GRB.OPTIMAL:
    for i in inspector_list:
        if inspector_open[i].x > 0.5:
            print(f"Inspector {i} is open.")
Inspector (214661.63196313646, 522281.05826042645) is open.
Inspector (126176.2665035578, 485020.65824860364) is open.
Inspector (147082.94392638467, 477083.8361718136) is open.
Inspector (85386.7756160895, 434543.49486099585) is open.
Inspector (191308.23860988903, 349167.042247853) is open.
Inspector (101195.83148977588, 429098.5849161285) is open.
Inspector (182240.4021482885, 468109.0738846255) is open.
Inspector (86687.68324316983, 454221.19664709666) is open.
Inspector (110918.40722755156, 407579.4760459118) is open.
Inspector (84253.60863873496, 451445.50471651094) is open.
Inspector (125100.81850496237, 483520.16222288396) is open.
Inspector (95783.30891428067, 440595.263199818) is open.
Inspector (181784.5864279497, 425775.92177486286) is open.
Inspector (120113.78568383006, 410747.9360088892) is open.
Inspector (82736.10825107961, 450155.104838198) is open.
Inspector (123640.03100311589, 423974.44293664006) is open.
Inspector (165567.49397750318, 510096.27667052066) is open.
Inspector (179716.7907082473, 321766.7188275872) is open.
Inspector (109080.76246324355, 427841.71899162186) is open.
Inspector (88570.21274827162, 431967.73791086755) is open.
Inspector (85035.49383474121, 436022.90689302044) is open.
Inspector (117986.85741325465, 426372.78992835456) is open.
Inspector (106590.04512824278, 449434.86588360363) is open.
Inspector (124484.38586808434, 427290.53492516524) is open.
Inspector (124628.5079717082, 491315.3255665832) is open.
Inspector (182374.3340052628, 550575.0912225958) is open.
Inspector (176658.73222433127, 483991.4801265802) is open.
Inspector (96536.22415364183, 430962.84290836327) is open.
Inspector (117935.61241897251, 426348.9889284405) is open.
Inspector (136929.74913511562, 447699.6229295262) is open.
Inspector (86394.50529746643, 431940.7709151198) is open.
Inspector (118376.81095857426, 490522.76328957244) is open.
Inspector (199448.15840706875, 563933.8811156686) is open.
Inspector (197598.5671797051, 483674.2558943391) is open.
Inspector (174829.71521617638, 365274.47518706304) is open.
Inspector (117896.17310855103, 486131.98022274824) is open.
Inspector (141402.26015562806, 434573.32990540593) is open.
Inspector (130003.50806118165, 459548.38798575127) is open.
Inspector (166103.83203645007, 478110.81212021405) is open.
Inspector (157178.64200248106, 463505.43999739335) is open.
Inspector (122691.64745424612, 482386.80319667433) is open.
Inspector (202592.5766580128, 384330.0190249146) is open.
Inspector (213579.78036329008, 472663.4965730284) is open.
Inspector (88341.8106889584, 421927.3808537196) is open.
Inspector (82760.78827000303, 432004.7349239328) is open.
Inspector (110345.5360842677, 393358.94816566753) is open.
Inspector (118408.25851442969, 483408.53259851487) is open.

The results are depicted in the cell below

In [ ]:
inspector = pd.read_csv('inspector_47_frequency')
inspector
Out[ ]:
Unnamed: 0 inspector_longitude inspector_latitude
0 0 4.464330 52.114037
1 1 4.404451 52.077426
2 2 5.404912 51.470079
3 3 5.048051 52.111585
4 4 4.867038 52.416876
5 5 5.430408 51.410234
6 6 5.231189 51.521216
7 7 5.183445 51.534300
8 8 4.985770 51.914184
9 9 4.731319 52.060078
10 10 5.479357 52.165201
11 11 4.537489 51.911134
12 12 5.827633 52.026531
13 13 4.816565 52.065026
14 14 4.986789 52.187305
15 15 5.095632 51.981006
16 16 4.830527 52.067922
17 17 4.670357 52.438107
18 18 5.140929 51.945572
19 19 4.907971 52.072231
20 20 4.654694 52.022318
21 21 4.705525 52.040203
22 22 4.546976 51.899274
23 23 4.709110 52.042374
24 24 4.617865 51.979827
25 25 4.982492 52.227373
26 26 4.731214 52.296520
27 27 5.480238 52.165253
28 28 4.901743 52.071198
29 29 4.561805 52.166344
30 30 5.020996 51.953294
31 31 4.986765 52.171137
32 32 5.198169 52.092588
33 33 5.797091 51.009008
34 34 4.884440 52.339617
35 35 4.849647 52.401455
36 36 5.095351 51.969643
37 37 4.747886 52.363902
38 38 5.875799 52.025226
39 39 5.672975 52.012722
40 40 5.257510 52.105949
41 41 5.157991 52.076872
42 42 4.752857 51.526333
43 43 4.523877 52.147799
44 44 4.965131 52.080664
45 45 4.882668 51.686942
46 46 4.738101 52.064522
In [ ]:
incident_to_inspector = pd.read_csv('incident_47_inspector_frequency')
incident_to_inspector
Out[ ]:
Unnamed: 0 incident_latitude incident_longitude inspector_latitude inspector_longitude travel_time[s]
0 0 52.346931 4.974663 52.042374 4.709110 2044.186394
1 1 52.514820 4.716725 51.865601 5.720545 4290.843888
2 2 52.609730 4.738364 51.713488 5.341347 3889.921043
3 3 52.204929 6.824692 52.684687 6.269522 4003.598620
4 4 52.041920 4.346407 52.076872 5.157991 2219.686538
... ... ... ... ... ... ...
57685 57685 51.003658 5.791368 51.408404 5.567596 3383.915146
57686 57686 51.875309 4.372907 51.493147 5.316205 3201.669130
57687 57687 52.244808 6.343580 52.438107 4.670357 6326.814777
57688 57688 52.396221 4.705645 52.178162 5.180296 2582.648111
57689 57689 52.038780 4.351567 51.800999 4.459076 1509.140796

57690 rows × 6 columns

The cells after this section are for the documentation of the travel time and the inspector location.

In [ ]:
chosen_inspectors = []
assigned_flows =np.zeros((len(train_list),len(inspector_list)))
if model.status == gp.GRB.OPTIMAL:
    for i in inspector_list:
        if inspector_open[i].x > 0.5:
            chosen_inspectors.append(inspector_list)

    for i in train_list:
        for j in inspector_list:
            if flow[i,j].x >0.5:
                assigned_flows[train_list.index(i),inspector_list.index(j)] = 1
In [ ]:
train_list = [WGS84toDutchRD(row.longitude, row.latitude) for idx, row in gdf_incident_wgs84.iterrows()]
optim_inspector, travel_time_optim = travel_time_check(chosen_inspectors)
100%|██████████| 57690/57690 [26:08<00:00, 36.77it/s]  
In [ ]:
sum(travel_time_optim)/len(travel_time_optim)/60
Out[ ]:
59.4369490021223
In [ ]:
test_data.to_csv('test_data_syakal')

After this, Trial and error section¶

In [ ]:
closest_inspector in inspector_list
Out[ ]:
True
In [ ]:
incident in train_list
Out[ ]:
True
In [ ]:
travel_time_func_freq((167629.33865868987, 379988.9011139234), (83943.80192530078, 450556.6383467211), time='min')/60
Out[ ]:
75.45638111053375
In [ ]:
plt.plot(range(start,end+1), mean_travel_time)
plt.axhline(18, color='red', label='existing average')
plt.axhline(14, color='green')
plt.title('Number of Inspector vs Average Travel Time')
plt.xlabel('Number of inspector')
plt.ylabel('Average Travel Time (minutes)')
plt.show()
---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
c:\Users\syaka\Documents\TUD Lecture Files\CIEM6302 - Advanced Data Science for TTE\Repo\RWS-group3\road_inspector_location.ipynb Cell 42 line 1
----> <a href='vscode-notebook-cell:/c%3A/Users/syaka/Documents/TUD%20Lecture%20Files/CIEM6302%20-%20Advanced%20Data%20Science%20for%20TTE/Repo/RWS-group3/road_inspector_location.ipynb#Y105sZmlsZQ%3D%3D?line=0'>1</a> plt.plot(range(start,end+1), mean_travel_time)
      <a href='vscode-notebook-cell:/c%3A/Users/syaka/Documents/TUD%20Lecture%20Files/CIEM6302%20-%20Advanced%20Data%20Science%20for%20TTE/Repo/RWS-group3/road_inspector_location.ipynb#Y105sZmlsZQ%3D%3D?line=1'>2</a> plt.axhline(18, color='red', label='existing average')
      <a href='vscode-notebook-cell:/c%3A/Users/syaka/Documents/TUD%20Lecture%20Files/CIEM6302%20-%20Advanced%20Data%20Science%20for%20TTE/Repo/RWS-group3/road_inspector_location.ipynb#Y105sZmlsZQ%3D%3D?line=2'>3</a> plt.axhline(14, color='green')

NameError: name 'start' is not defined
In [ ]:
# the most optimised is 71
frequency_inspector_location_rerun, frequency_travel_time_per_incident_rerun, mean = travel_time_frequency(71)
frequency_travel_time_per_incident_rerun.to_csv('frequency_travel_time_per_incident_rerun')
frequency_inspector_location_rerun.to_csv('frequency_inspector_location_rerun')
In [ ]:
pd.read_csv('frequency_inspector_location_rerun')
In [ ]:
m = folium.Map(location=[52.3469314575, 4.9746627808], zoom_start=10)

geo = []
for i in range(len(frequency_inspector_location['inspector_latitude'])):
    app = []
In [ ]:
from folium import plugins

# Create a map
mymap = folium.Map(location=[52.3469314575, 4.9746627808], zoom_start=10)

# Iterate through inspector locations and add markers
for lat, lon in zip(frequency_inspector_location['inspector_latitude'], frequency_inspector_location['inspector_longitude']):
    folium.Marker(location=[lat, lon], popup=f"Lat: {lat}, Lon: {lon}").add_to(mymap)

# Display the map
mymap
In [ ]:
travel_time_incident= []
for j in range(1499,len(incident_longitude)):
    travel_time_inspector = []
    for inspector in location_list:
        point_inspector = (inspector[1],inspector[0])
        point_accident = (incident_longitude[j], incident_latitude[j])
        travelled_time = travel_time_func(point_inspector, point_accident, time='min')
        travel_time_inspector.append(travelled_time)
    travel_time_incident.append(travel_time_inspector)
    print('incident counter:{}'.format(j+1))
In [ ]:
#a = np.array(travel_time_incident)
#batch_1 = pd.DataFrame(a) 
batch_1.to_csv('batch1_frequency_based_travel_time') # from 1-284
In [ ]:
b = np.array(travel_time_incident)
batch_2 = pd.DataFrame(b)
batch_2.to_csv('batch2_frequency_based_travel_time') #from 284-1499 (171,480)
In [ ]:
def location_list(ranked_road_section, inspector_number, threshold):
    ranked_list = list(ranked_road_section.index)
    accident_list = list(ranked_road_section['Distance_to_line_meters'])
    road_id_dict = nx.get_edge_attributes(G, 'Road_section_id')
    loc = []
    acc = []
    for i in range(0,inspector_number):
        road_section = ranked_list[i]
        for key in road_id_dict:
            if road_section == road_id_dict[key]:
                location = key[0]
                if len(loc) == 0:
                    acc.append(accident_list[i])
                    loc.append(location)
                    print(location)
                elif len(loc) > 0:
                    for node in loc:
                        print(node)
                        # try:
                        distance1 = nx.shortest_path_length(G, source=location, target=node, 
                                                    weight='Min_travel_time_[s]')
                        distance2 = nx.shortest_path_length(G, source=node, target=location, 
                                                    weight='Min_travel_time_[s]')
                        if distance1 > 2*threshold*60 and distance1 > 2*threshold*60:
                            acc.append(accident_list[i])
                            loc.append(location)
                            print(location)
                #         except nx.NetworkXNoPath:
                #             break
                else:
                    break
            else:
                continue

    return(loc, accident_list)

def distance_between_inspector(loc_list, accident_list, threshold):
    s = len(loc_list)
    dist = np.zeros((s,s))
    too_close_nodes = {}
    too_close_time = {}
    for i in range(s):
        for j in range(s):
            if i == j:
                dist[i][j] = 99999999
            else:
                try:
                    distance = nx.shortest_path_length(G, source=loc_list[i], target=loc_list[j], 
                                                weight='Min_travel_time_[s]')

                except nx.NetworkXNoPath:
                    continue
                dist[i][j] = distance/60
            
                if dist[i][j] < 2*threshold:
                    too_close_time[loc_list[i], loc_list[j]] = dist[i][j]
                    too_close_nodes[loc_list[i]] = accident_list[i]
                    too_close_nodes[loc_list[j]] = accident_list[j]
        
    return(dist, too_close_nodes, too_close_time)
    
def inspector_locator_23_oct(num , G, grouped_section_id,):
    ranked_road_section = grouped_section_id.sort_values(by=['Distance_to_line_meters'], ascending=False)
    road_id_dict = nx.get_edge_attributes(G, 'Road_section_id')
    
In [ ]:
ranked_road_section = grouped_section_id.sort_values(by=['Distance_to_line_meters'], ascending=False)
num_of_accident = ranked_road_section['Distance_to_line_meters']
# ranked_section

loc,accident = location_list(ranked_road_section, 50,0)
print('initial loc = ',loc)
print('accident = ',accident)

dist, too_close_acc, too_close_time = distance_between_inspector(loc,accident, 18)
too_close_acc
# print('dist = ',dist)

# print('too close = \n',too_close)
In [ ]:
loc = list(ranked_road_section.iloc[0:30].index)
accident_list = list(ranked_road_section['Distance_to_line_meters'])

# for node in loc:
#     print(node)
#     try:
#         distance1 = nx.shortest_path_length(G, source=location, target=node, 
#                                                     weight='Min_travel_time_[s]')
#         distance2 = nx.shortest_path_length(G, source=node, target=location, 
#                                                     weight='Min_travel_time_[s]')
#         if distance1 > 2*threshold*60 and distance1 > 2*threshold*60:
#             acc.append(accident_list[i])
#             loc.append(location)
#             print(location)
#     except nx.NetworkXNoPath:
#         continue

loc
In [ ]:
# travel_time function
def travel_time_func(point1, point2, time='min'):
    """This function uses the information given in network X to return the travel time between two points.
        point1 and point2 should be tuples with the coordinates in longitude, latitude.
        if time = 'peak', the peak travel time is used. In all other cases the minimum travel time is used."""

    # Determine which travel times to use
    if time == 'peak':
        time_string = 'Peak_travel_time_[s]'
    else:
        time_string = 'Min_travel_time_[s]'

    # Change points to Dutch system
    p1_x, p1_y = WGS84toDutchRD(point1[0], point1[1]) # inspector
    p2_x, p2_y = WGS84toDutchRD(point2[0], point2[1]) # incident

    # Create numpy matrix from nodes
    A = np.array(list(G.nodes()))

    # Get node closest to each point
    dist_node1, index_node1 = spatial.KDTree(A).query([p1_x, p1_y])
    node1 = (A[index_node1][0], A[index_node1][1])

    dist_node2, index_node2 = spatial.KDTree(A).query([p2_x, p2_y])
    node2 = (A[index_node2][0], A[index_node2][1])

    # Get shortest path between nodes
    route = nx.shortest_path(G, node1, node2, time_string)
    travel_time = nx.shortest_path_length(G, node1, node2, time_string)

    return route, travel_time

point1 = DutchRDtoWGS84(87630.299, 454805.64)
point2 = DutchRDtoWGS84(91786.70511, 458824.87402)

print(point1)
print(point2)
travel_time_func(point1, point2)
In [ ]:
list(G.nodes())
In [ ]:
for inspector_num in range(100,121):
    ranked_road_section = grouped_section_id.sort_values(by=['Distance_to_line_meters'], ascending=False)
    z = list(ranked_road_section.iloc[0:inspector_num].index)
    network_nodes = list(G.nodes())

    loc = []
    for i in range(len(z)):
        road_section = z[i]
        for key in road_id_dict:
            if road_section == road_id_dict[key]:
                location = key[0]
                loc.append(location)


    # s = len(loc)
    # dist = np.zeros((s,s))
    # for i in range(s):
    #     for j in range(s):
    #         if i == j:
    #             continue
    #         else:
    #             try:
    #                 distance = nx.shortest_path_length(G, source=loc[i], target=loc[j], 
    #                                             weight='Min_travel_time_[s]')
    #             except nx.NetworkXNoPath:
    #                 continue

    #             dist[i][j] = distance/60

    s = len(loc)
    dist = np.zeros((s,len(network_nodes)))
    for i in range(s):
        for j in range(len(network_nodes)):
            if loc[i] == network_nodes[j]:
                continue
            else:
                try:
                    distance = nx.shortest_path_length(G, source=loc[i], target=network_nodes[j], 
                                                weight='Min_travel_time_[s]')
                except nx.NetworkXNoPath:
                    continue

                dist[i][j] = distance/60
            

    new_loc =[]
    for coor in loc:
        x,y = DutchRDtoWGS84(coor[0],coor[1])
        new = (x,y)
        new_loc.append(new)
In [ ]:
acc = nx.get_edge_attributes(G, 'Number_of_accident')
sum(acc.values())
In [ ]:
# Get a list of edge attributes
edge_attributes = set()
for u, v, data in G.edges(data=True):
    edge_attributes.update(data.keys())

# Print the list of edge attributes
print("Edge attributes in the graph:")
for attr in edge_attributes:
    print(attr)

G.get_edge_attributes
In [ ]:
s = len(new_loc)
dist = np.zeros((s,s))
np.shape(dist)
In [ ]:
m = folium.Map(location=[52.399190, 4.893658])

# gjson = folium.features.GeoJson(
#         gdf_incident_wgs84,
#     ).add_to(m)

for i in range(len(new_loc)):
    folium.Marker(location=[new_loc[i][0],new_loc[i][1]]).add_to(m)

m
In [ ]:
coords = target[0]
a = LineString(coords)
a
In [ ]:
len(list(G.nodes()))
In [ ]:
route_map = ox.plot_route_folium(G, target[0], crs='EPSG:28992')
In [ ]:
def radius(loc_distance, loc_dist, l, loc, threshold):
    if distance <= 2*60*threshold:
        loc.append(l)
        loc_distance.append(loc_dist)
In [ ]:
def road_section_sort(grouped_section_id, G, threshold):
    loc = []
    loc_dist = []
    ranked_road_section = grouped_section_id.sort_values(by=['Distance_to_line_meters'], ascending=False)
    road_id_dict = nx.get_edge_attributes(G, 'Road_section_id')


    for i in range(len(ranked_road_section)):
        road_section = ranked_road_section.index[i]

        for key in road_id_dict:
            if road_section == road_id_dict[key]:
                loca = key[0]
                break
            else:
                continue

        for l in loc:
            try:
                locations_distance = nx.shortest_path_length(G, source=location, target=l, 
                                                             weight='Min_travel_time_[s]')

            except nx.NetworkXNoPath:
                continue

            location_distance()